import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas.tools.plotting
import seaborn as sns
import matplotlib
# Install "fuzzywuzzy" and "python-Levenshtein" using pip install
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
%matplotlib inline
plt.style.use('seaborn')
# Install "plotly"
import plotly.offline as offline
offline.init_notebook_mode()
import plotly.graph_objs as go
plt.style.use('ggplot')
pd.options.display.max_rows =300
plt.rc('ytick',labelsize=12)
plt.rc('ytick',labelsize=12)
plt.rc('axes',labelsize=12)
root = 'c:\\Users\michael.madaio'
path = "\Documents\EQUALS\HackerRank Developer Survey"
os.chdir(root + path)
print(os.getcwd())
codebook = pd.read_csv('HackerRank-Developer-Survey-2018-Codebook.csv')
numeric_mapping = pd.read_csv('HackerRank-Developer-Survey-2018-Numeric-Mapping.csv')
numeric = pd.read_csv('HackerRank-Developer-Survey-2018-Numeric.csv', na_values=['#NULL!', 'nan'], low_memory=False)
values = pd.read_csv('HackerRank-Developer-Survey-2018-Values.csv', na_values=['#NULL!', 'nan'], low_memory=False)
country_mapping = pd.read_csv('Country-Region-Mapping.csv',encoding='latin-1')
values.head()
codebook.columns = ['fieldname', 'question', 'notes']
codebook.set_index('fieldname', inplace=True);
numeric_mapping.set_index('Data Field', inplace=True)
numeric.q1AgeBeginCoding = numeric.q1AgeBeginCoding.astype(float)
numeric.q2Age = numeric.q2Age.astype(float)
numeric = numeric.fillna(-1)
values = values.fillna('Not provided')
## Test Fuzzy String matching for country name joining
names_array=[]
ratio_array=[]
def match_names(wrong_names,correct_names):
for row in wrong_names:
x=process.extractOne(row, correct_names)
names_array.append(x[0])
ratio_array.append(x[1])
return names_array,ratio_array
#Input country names from survey dataset
survey_names=values['CountryNumeric2'].dropna().unique()
#print(survey_names)
#Correct country names dataset
correct_names=country_mapping['name'].values
# Only need to run this line once:
# name_match,ratio_match=match_names(survey_names,correct_names)
# print(len(name_match))
# print(values['CountryNumeric2'].dropna().unique())
#np.savetxt("fixed_country_names.csv", name_match, delimiter=",", fmt='%s')
fixed = pd.read_csv("fixed_country_names.csv",header=None,encoding="ISO-8859-1")
#print(len(fixed))
d = {'New':fixed[0],'Old':survey_names}
cross_ref =pd.DataFrame(d)
# Replace survey dataset country names with correct country names.
values = values.join(cross_ref.set_index('Old'), on='CountryNumeric2')
values = values.rename(index=str, columns={"New": "CountryName"})
# Join region labels to survey data by country name
values = values.join(country_mapping.set_index('name'), on='CountryName')
#print(country_mapping)
print(values["q3Gender"].value_counts())
# Gender Count
sns.set(font_scale=1)
count = sns.countplot(x='q3Gender', data=numeric)
count.set_xticklabels(np.append(['Not provided'], numeric_mapping.loc['q3Gender'].values[:, 1]));
count.set_xlabel('Gender')
count.set_ylabel('Count')
count.set_title("HackerRank: Gender Response")
fig = count.get_figure()
fig.savefig('HR_gender_barplot.jpg')
print("")
# Clean country data and find gender ratio
# Only use countries with > 50 respondents
threshold = 50
counts = values["CountryName"].value_counts()
country_min = values.loc[values["CountryName"].isin(counts[counts > threshold].index), :]
# Calculate female/male ratio
female = country_min[country_min['q3Gender'] == 'Female']['CountryName'].value_counts()
male = country_min[country_min['q3Gender'] == 'Male']['CountryName'].value_counts()
female_share = female/(female+male).dropna()
# Drop respondents that didn't provide gender
values = values[values["q3Gender"] != "Not provided"]
len(values["CountryName"].value_counts())
len(country_min["CountryName"].value_counts())
country_total = country_min['CountryName'].value_counts().plot(kind='bar',figsize=(12,10), title="HackerRank: Total Respondents (with more than 50 total)")
fig = country_total.get_figure()
fig.savefig('HR_countries_total-min50_barplot.jpg')
plt.xticks(rotation=90)
female_share_countries = female_share.sort_values().plot(kind='bar',figsize=(15,10), title="HackerRank: % Female Respondents")
fig = female_share_countries.get_figure()
fig.savefig('HR_countries_female-share_barplot.jpg')
# Regional Count
#print(values['CountryNumeric2'].value_counts())
print(values['region'].value_counts())
sns.set(font_scale=1)
count = sns.countplot(x='region', data=values)
count.set_xlabel('Region')
count.set_ylabel('Count')
count.set_title("HackerRank: Regional Total # of Respondents")
fig = count.get_figure()
fig.savefig('HR_regional_total_barplot.jpg')
print("")
## Before mapping, only use respondents that listed a country
only_names = values[(values["CountryName"]!= "Not provided") & (values["CountryName"]!= "Asia/Pacific Region") & (values["CountryName"]!= "Europe")]
# Visualize by country
country = only_names['CountryName'].value_counts()
#print(country)
# plotly
data = [dict(
type ='choropleth',
locations = country.index,
locationmode ='country names',
z = country.values,
text = ('Count'+'<br>'),
colorscale='Jet',
reversescale=False,
marker=dict(line=dict(color='rgb(180,180,180)', width=0.5)),
colorbar = dict(title ='HackerRank: Total Respondents by Country')
)]
layout = dict(title = 'Number of respondents by country',
geo = dict( showframe= False,
projection = dict(type = 'Mercator')))
fig = dict(data=data, layout=layout)
offline.plot(fig, filename= "HR_female_total.html", image_filename="HR_female_total",image='png')
print("Open this map: HR_female_total.png")
### Map of Percent Female Respondents by Country
# plotly
data = [dict(
type ='choropleth',
locations = female_share.index,
locationmode ='country names',
z = female_share.values,
text = '% female',
colorscale=[[0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
[0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"]],
reversescale=True,
marker=dict(line=dict(color='rgb(180,180,180)', width=0.5)),
colorbar = dict(title ='Response count')
)]
layout = dict(title = 'HackerRank: Percent Female Response by Country',
geo = dict( showframe= False,
showcoastlines=True,
projection = dict(type = 'Mercator')))
fig = dict(data=data, layout=layout)
offline.plot(fig, filename= "HR_female_percent.html", image_filename="HR_female_percent",image='png')
print("Open this map: HR_female_percent.png")
# Create new Dataframes for each region
asia = values[values["region"] == "Asia"]
americas = values[values["region"] == "Americas"]
europe = values[values["region"] == "Europe"]
africa = values[values["region"] == "Africa"]
oceania = values[values["region"] == "Oceania"]
# Create new Dataframes for each Country case study
indonesia = values[values["CountryName"] == "Indonesia"]
argentina = values[values["CountryName"] == "Argentina"]
south_africa = values[values["CountryName"] == "South Africa"]
pakistan = values[values["CountryName"] == "Pakistan"]
# Function for drawing heatmaps
def draw_heatmap(column1, column2, title=None, annot=True, ax=None, size=(10, 10), data=values):
cross = pd.crosstab(data[column1], data[column2], normalize="columns")
if ax is None:
f, ax = plt.subplots(figsize=size)
sns.heatmap(cross, cmap='Reds', fmt='g', annot=annot, ax=ax)
# ax.set_ylabel(codebook.loc[column1]['question'])
ax.set_xlabel(codebook.loc[column2]['question'])
if title is not None:
ax.set_title(title)
# Fix age category data
# We need to shift NaN to 0, because data starts from value 1
numeric.loc[numeric['q1AgeBeginCoding'] == -1, 'q1AgeBeginCoding'] = 0
numeric.loc[numeric['q2Age'] == -1, 'q2Age'] = 0
# And to trim text so that it fits plots
numeric_mapping.loc['q2Age'] = [[i+1, j] for i, j in zip(range(9), ['Under 12', '12 - 18', '18 - 24', '25 - 34', '35 - 44', '45 - 54', '55 - 64', '65 - 74', '75+'])]
# Re-sort q1AgeBeginCoding list
#print(numeric_mapping.loc['q1AgeBeginCoding'])
#numeric_mapping.loc['q1AgeBeginCoding'] = pd.Categorical(numeric_mapping.loc['q1AgeBeginCoding'], ['5 - 10', '11 - 15', '16 - 20', '21 - 25', '26 - 30', '31 - 35', '36 - 40', '41 - 50'])
numeric_mapping.loc['q1AgeBeginCoding'] = numeric_mapping.loc['q1AgeBeginCoding'].applymap(lambda x: str(x).replace('years old', ''))
## Create male/female dataframes, for heatmapping
women = values[values["q3Gender"]=="Female"]
men = values[values["q3Gender"]=="Male"]
# Age and Age Started Coding
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(15,7))
sns.set()
draw_heatmap('q2Age', 'q3Gender', ax=ax[0], annot=True, data=values)
draw_heatmap('q1AgeBeginCoding', 'q3Gender', ax=ax[1], annot=True, data=values)
fig.suptitle("HackerRank: Age and Years of Coding, by Gender")
fig.tight_layout()
plt.subplots_adjust(top=0.92)
plt.savefig('HR_age_years-coding_gender.jpg')
# Age and Age Started Coding, by Gender
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(23,10))
fig.suptitle("HackerRank: Age and Years of Coding, by Gender")
sns.set()
draw_heatmap('q1AgeBeginCoding', 'q2Age', ax=ax[0], annot=True, data=women)
ax[0].set_xlabel("Age")
ax[0].set_ylabel("q1AgeBeginCoding")
ax[0].set_title('Women')
draw_heatmap('q1AgeBeginCoding', 'q2Age', ax=ax[1], annot=True, data=men)
ax[1].set_xlabel("Age")
ax[1].set_ylabel("q1AgeBeginCoding")
ax[1].set_title('Men')
fig.tight_layout()
plt.subplots_adjust(top=0.92)
plt.savefig('HR_age_years-coding_gender_combined.jpg')
# Education level
print('Q:',codebook.loc['q4Education']['question'],"\n")
f,ax = plt.subplots(1,2, figsize=(16,6))
st_age = values['q4Education'].value_counts()
sns.barplot(st_age.values, st_age.index, palette='Wistia',ax=ax[0])
ax[0].set_title('Distribution of Education')
ax[0].set_xlabel('Count')
for i, v in enumerate(st_age.values):
ax[0].text(0.8,i,v,color='k',fontsize=12)
sns.set()
draw_heatmap('q4Education', 'q3Gender', ax=ax[1], annot=True)
plt.subplots_adjust(wspace=0.7)
plt.suptitle("HackerRank: Education, by Gender")
plt.savefig('HR_education_gender.jpg')
# Education level
print('Q:',codebook.loc['q4Education']['question'],"\n")
f,ax = plt.subplots(3,2, figsize=(16,10))
st_age = values['q4Education'].value_counts()
sns.barplot(st_age.values, st_age.index, palette='Wistia',ax=ax[0][0])
ax[0][0].set_title('Distribution of Education')
ax[0][0].set_xlabel('Count')
for i, v in enumerate(st_age.values):
ax[0][0].text(0.8,i,v,color='k',fontsize=12)
sns.set()
draw_heatmap('q4Education', 'q3Gender', ax=ax[0][1], annot=True, data=africa)
ax[0][1].set_title('Africa')
sns.set()
draw_heatmap('q4Education', 'q3Gender', ax=ax[1][0], annot=True, data=americas)
ax[1][0].set_title('Americas')
sns.set()
draw_heatmap('q4Education', 'q3Gender', ax=ax[1][1], annot=True, data=asia)
ax[1][1].set_title('Asia')
sns.set()
draw_heatmap('q4Education', 'q3Gender', ax=ax[2][0], annot=True, data=europe)
ax[2][0].set_title('Europe')
sns.set()
draw_heatmap('q4Education', 'q3Gender', ax=ax[2][1], annot=True, data=oceania)
ax[2][1].set_title('Oceania')
plt.subplots_adjust(hspace=0.6, wspace=0.7, top=0.92)
plt.suptitle("HackerRank: Education, by Gender and Region")
plt.savefig('HR_education_gender_region.jpg')
print("Argentina:", len(argentina), "\nIndonesia:", len(indonesia), "\nSouth Africa:", len(south_africa), "\nPakistan", len(pakistan))
# Education level
print('Q:',codebook.loc['q4Education']['question'],"\n")
f,ax = plt.subplots(2,2, figsize=(16,10))
question = 'q4Education'
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[0][0], annot=True, data=argentina)
ax[0][0].set_title('Argentina')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[0][1], annot=True, data=indonesia)
ax[0][1].set_title('Indonesia')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][0], annot=True, data=south_africa)
ax[1][0].set_title('South Africa')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][1], annot=True, data=pakistan)
ax[1][1].set_title('Pakistan')
plt.suptitle("HackerRank: Education by Gender, for 4 Countries")
plt.subplots_adjust(hspace=0.4, wspace=0.7, top=0.92)
plt.savefig('HR_education_gender_4countries.jpg')
# Industry
print('Q:',codebook.loc['q10Industry']['question'],"\n")
f,ax = plt.subplots(1,2,figsize=(16,10))
ind = values['q10Industry'].value_counts()
sns.barplot(ind.values, ind.index, palette='Wistia',ax=ax[0])
ax[0].set_title('Distribution of Indusry')
ax[0].set_xlabel('Count')
for i, v in enumerate(ind.values):
ax[0].text(0.8,i,v,color='k',fontsize=12)
sns.set()
draw_heatmap('q10Industry', 'q3Gender', ax=ax[1], annot=True)
plt.subplots_adjust(wspace=0.7, top=0.92);
plt.suptitle("HackerRank: Industry by Gender")
plt.savefig('HR_industry_gender.jpg')
# Industry
print('Q:',codebook.loc['q10Industry']['question'],"\n")
f,ax = plt.subplots(3,2,figsize=(16,17))
ind = values['q10Industry'].value_counts()
sns.barplot(ind.values, ind.index, palette='Wistia',ax=ax[0][0])
ax[0][0].set_title('Distribution of Indusry')
ax[0][0].set_xlabel('Count')
for i, v in enumerate(ind.values):
ax[0][0].text(0.8,i,v,color='k',fontsize=12)
question = 'q10Industry'
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[0][1], annot=True, data=africa)
ax[0][1].set_title('Africa')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][0], annot=True, data=americas)
ax[1][0].set_title('Americas')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][1], annot=True, data=asia)
ax[1][1].set_title('Asia')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[2][0], annot=True, data=europe)
ax[2][0].set_title('Europe')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[2][1], annot=True, data=oceania)
ax[2][1].set_title('Oceania')
plt.subplots_adjust(wspace=0.7, top=0.95);
plt.suptitle("HackerRank: Industry by Gender, Region")
plt.savefig('HR_industry_gender_region.jpg')
# Industry
print('Q:',codebook.loc['q10Industry']['question'],"\n")
f,ax = plt.subplots(2,2,figsize=(16,14))
question = 'q10Industry'
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[0][0], annot=True, data=argentina)
ax[0][0].set_title('Argentina')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[0][1], annot=True, data=indonesia)
ax[0][1].set_title('Indonesia')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][0], annot=True, data=south_africa)
ax[1][0].set_title('South Africa')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][1], annot=True, data=pakistan)
ax[1][1].set_title('Pakistan')
plt.subplots_adjust(wspace=0.7, top=0.95)
plt.suptitle("HackerRank: Industry by Gender, for 4 Countries")
plt.savefig('HR_industry_gender_4countries.jpg')
# Job Level
print('Q:',codebook.loc['q8JobLevel']['question'],"\n")
f,ax = plt.subplots(1,2,figsize=(16,6))
job = values['q8JobLevel'].value_counts()
sns.barplot(job.values, job.index, palette='Wistia',ax=ax[0])
ax[0].set_xlabel('Count')
ax[0].set_title('Distribution of Job Level')
for i, v in enumerate(job.values):
ax[0].text(0.8,i,v,color='k',fontsize=12)
sns.set()
draw_heatmap('q8JobLevel', 'q3Gender', ax=ax[1], annot=True)
plt.subplots_adjust(wspace=0.4);
plt.suptitle("HackerRank: Job Level by Gender")
fig.tight_layout()
plt.savefig('HR_joblevel_gender.jpg')
# Job Level
print('Q:',codebook.loc['q8JobLevel']['question'],"\n")
f,ax = plt.subplots(3,2,figsize=(16,15))
job = values['q8JobLevel'].value_counts()
sns.barplot(job.values, job.index, palette='Wistia',ax=ax[0][0])
ax[0][0].set_xlabel('Count')
ax[0][0].set_title('Distribution of Job Level')
for i, v in enumerate(job.values):
ax[0][0].text(0.8,i,v,color='k',fontsize=12)
question = 'q8JobLevel'
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[0][1], annot=True, data=africa)
ax[0][1].set_title('Africa')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][0], annot=True, data=americas)
ax[1][0].set_title('Americas')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][1], annot=True, data=asia)
ax[1][1].set_title('Asia')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[2][0], annot=True, data=europe)
ax[2][0].set_title('Europe')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[2][1], annot=True, data=oceania)
ax[2][1].set_title('Oceania')
plt.subplots_adjust(hspace=0.25, wspace=0.7, top=0.95)
plt.suptitle("HackerRank: Job Level by Gender, Region")
fig.tight_layout()
plt.savefig('HR_joblevel_gender_region.jpg')
# Job Level
print('Q:',codebook.loc['q8JobLevel']['question'], "\n")
f,ax = plt.subplots(2,2,figsize=(16,15))
question = 'q8JobLevel'
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[0][0], annot=True, data=argentina)
ax[0][0].set_title('Argentina')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[0][1], annot=True, data=indonesia)
ax[0][1].set_title('Indonesia')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][0], annot=True, data=south_africa)
ax[1][0].set_title('South Africa')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][1], annot=True, data=pakistan)
ax[1][1].set_title('Pakistan')
plt.subplots_adjust(hspace=0.2, wspace=0.7, top=0.95)
plt.suptitle("HackerRank: Job Level by Gender, for 4 Countries")
plt.savefig('HR_joblevel_gender_4countries.jpg')
# Share of female senior developers by country
senior_dev = only_names[only_names['q8JobLevel'] == 'Senior developer']
female = senior_dev[senior_dev['q3Gender'] == 'Female']['CountryName'].value_counts()
male = senior_dev[senior_dev['q3Gender'] == 'Male']['CountryName'].value_counts()
country_share = (female/(female+male)).dropna()
# plotly
data = [dict(
type ='choropleth',
locations = country_share.index,
locationmode ='country names',
z = country_share.values,
text = '% female',
colorscale=[[0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
[0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"]],
reversescale=True,
marker=dict(line=dict(color='rgb(180,180,180)', width=0.5)),
colorbar = dict(title ='Response count')
)]
layout = dict(title = 'HackerRank: Percent of Female Senior Developers',
geo = dict( showframe= False,
showcoastlines=True,
projection = dict(type = 'Mercator')))
fig = dict(data=data, layout=layout)
offline.plot(fig, filename= "HR_female_percent_senior.html", image_filename="HR_female_percent_senior",image='png')
print("Open this map: HR_female_percent_senior.png")
# Hiring Managers
print('Q?:',codebook.loc['q16HiringManager']['question'])
f,ax = plt.subplots(1,2,figsize=(16,2))
hm = values['q16HiringManager'].value_counts()
sns.barplot(hm.values, hm.index, palette='Wistia',ax=ax[0])
ax[0].set_title('Distribution of Hiring Manager')
ax[0].set_xlabel('Count')
for i, v in enumerate(hm.values):
ax[0].text(0.8,i,v,color='k',fontsize=12)
sns.set()
draw_heatmap('q16HiringManager', 'q3Gender', ax=ax[1], annot=True)
plt.subplots_adjust(wspace=0.7, top=0.75)
plt.suptitle("HackerRank: Hiring Role by Gender")
plt.savefig('HR_hiring_gender.jpg')
# Hiring Managers by region
print('Q:',codebook.loc['q16HiringManager']['question'], "\n")
f,ax = plt.subplots(3,2,figsize=(15,7))
hm = values['q16HiringManager'].value_counts()
sns.barplot(hm.values, hm.index, palette='Wistia',ax=ax[0][0])
ax[0][0].set_title('Distribution of Hiring Manager')
ax[0][0].set_xlabel('Count')
for i, v in enumerate(hm.values):
ax[0][0].text(0.8,i,v,color='k',fontsize=12)
question = 'q16HiringManager'
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[0][1], annot=True, data=africa)
ax[0][1].set_title('Africa')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][0], annot=True, data=americas)
ax[1][0].set_title('Americas')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][1], annot=True, data=asia)
ax[1][1].set_title('Asia')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[2][0], annot=True, data=europe)
ax[2][0].set_title('Europe')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[2][1], annot=True, data=oceania)
ax[2][1].set_title('Oceania')
plt.subplots_adjust(hspace=0.6, wspace=0.7, top=0.90)
plt.suptitle("HackerRank: Hiring Role by Gender, Region")
plt.savefig('HR_hiring_gender_region.jpg')
# Hiring Managers
print('Q:',codebook.loc['q16HiringManager']['question'], "\n")
f,ax = plt.subplots(2,2,figsize=(15,7))
question = 'q16HiringManager'
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[0][0], annot=True, data=argentina)
ax[0][0].set_title('Argentina')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[0][1], annot=True, data=indonesia)
ax[0][1].set_title('Indonesia')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][0], annot=True, data=south_africa)
ax[1][0].set_title('South Africa')
sns.set()
draw_heatmap(question, 'q3Gender', ax=ax[1][1], annot=True, data=pakistan)
ax[1][1].set_title('Pakistan')
plt.subplots_adjust(hspace=0.6, wspace=0.7, top=0.90)
plt.suptitle("HackerRank: Hiring Role by Gender, for 4 Countries")
plt.savefig('HR_hiring_gender_4countries.jpg')
# Map of share of female hiring managers
hiring_managers = only_names[only_names['q16HiringManager'] == 'Yes']
female = hiring_managers[hiring_managers['q3Gender'] == 'Female']['CountryName'].value_counts()
male = hiring_managers[hiring_managers['q3Gender'] == 'Male']['CountryName'].value_counts()
country_share = (female/(female+male)).dropna()
# plotly
data = [dict(
type ='choropleth',
locations = country_share.index,
locationmode ='country names',
z = country_share.values,
text = '% female',
colorscale=[[0,"rgb(5, 10, 172)"],[0.35,"rgb(40, 60, 190)"],[0.5,"rgb(70, 100, 245)"],\
[0.6,"rgb(90, 120, 245)"],[0.7,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"]],
reversescale=True,
marker=dict(line=dict(color='rgb(180,180,180)', width=0.5)),
colorbar = dict(title ='Response count')
)]
layout = dict(title = 'HackerRank: Percent of Female Hiring Managers',
geo = dict( showframe= False,
showcoastlines=True,
projection = dict(type = 'Mercator')))
fig = dict(data=data, layout=layout)
offline.plot(fig, filename= "HR_female_percent_hiring.html", image_filename="HR_female_percent_hiring",image='png')
print("Open this map: HR_female_percent_hiring.png")